In [1]:
import pandas as pd
df = pd.read_csv('RRR.L 2.csv')
print(df.head())
print(df.describe())
print(df.isnull().sum())
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
Date Close
0 1/1/2019 0.575
1 1/2/2019 0.600
2 1/3/2019 0.600
3 1/4/2019 0.525
4 1/5/2019 0.575
Close
count 60.000000
mean 0.483467
std 0.253323
min 0.105000
25% 0.308750
50% 0.475000
75% 0.581250
max 1.100000
Date 0
Close 0
dtype: int64
Activity 1¶
Exploratory Data Analysis and Visualization¶
In [9]:
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 6))
plt.plot(df['Close'], label='Close Price')
plt.title('Time Series of Close Price')
plt.xlabel('Date')
plt.ylabel('Close Price')
for i, (date, close) in enumerate(df['Close'].items()):
if i % 30 == 0:
plt.annotate(f'{close:.2f}', (date, close), textcoords="offset points", xytext=(0, 10), ha='center')
plt.legend()
plt.show()
In [8]:
import matplotlib.pyplot as plt
rolling_mean = df['Close'].rolling(window=12).mean()
rolling_std = df['Close'].rolling(window=12).std()
plt.figure(figsize=(12, 6))
plt.plot(df['Close'], color='blue', label='Close Price')
plt.plot(rolling_mean, color='red', label='Rolling Mean')
plt.plot(rolling_std, color='black', label='Rolling Std Dev')
for x, y in zip(df.index, df['Close']):
plt.text(x, y, f'{y:.2f}', fontsize=8, ha='center', va='bottom', color='blue')
for x, y in zip(df.index, rolling_mean):
if not pd.isna(y):
plt.text(x, y, f'{y:.2f}', fontsize=8, ha='center', va='bottom', color='red')
for x, y in zip(df.index, rolling_std):
if not pd.isna(y):
plt.text(x, y, f'{y:.2f}', fontsize=8, ha='center', va='bottom', color='black')
plt.title('Rolling Mean & Standard Deviation')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.legend()
plt.show()
In [7]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf
fig, ax = plt.subplots(figsize=(12, 6))
plot_acf(df['Close'], ax=ax, lags=40, alpha=0.05)
for i in range(1, 41):
y = np.corrcoef(df['Close'][:-i], df['Close'][i:])[0, 1]
ax.annotate(f'{y:.2f}', xy=(i, y), xytext=(i, y + 0.05),
arrowprops=dict(facecolor='black', arrowstyle='->'),
fontsize=9, ha='center')
plt.title('Autocorrelation Plot of Close Price')
plt.xlabel('Lags')
plt.ylabel('Autocorrelation')
plt.show()
Time Series Decomposition¶
In [11]:
import plotly.graph_objs as go
from plotly.subplots import make_subplots
from statsmodels.tsa.seasonal import seasonal_decompose
result = seasonal_decompose(df['Close'], model='additive', period=12)
observed = result.observed
trend = result.trend
seasonal = result.seasonal
resid = result.resid
# Create subplots
fig = make_subplots(rows=4, cols=1, shared_xaxes=True, subplot_titles=['Observed', 'Trend', 'Seasonal', 'Residual'])
fig.add_trace(go.Scatter(x=observed.index, y=observed, mode='lines', name='Observed'), row=1, col=1)
fig.add_trace(go.Scatter(x=trend.index, y=trend, mode='lines', name='Trend'), row=2, col=1)
fig.add_trace(go.Scatter(x=seasonal.index, y=seasonal, mode='lines', name='Seasonal'), row=3, col=1)
fig.add_trace(go.Scatter(x=resid.index, y=resid, mode='lines', name='Residual'), row=4, col=1)
fig.update_layout(height=800, title='Time Series Decomposition', showlegend=False)
fig.show()
Naive method¶
In [14]:
naive_forecast = df['Close'].shift(1)
# Plot actual vs forecast
plt.figure(figsize=(10, 6))
plt.plot(df['Close'], label='Actual')
plt.plot(naive_forecast, label='Naive Forecast', linestyle='--')
plt.title('Naive Forecast vs Actual')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.legend()
plt.show()
# Calculate accuracy metrics for Naive Method
from sklearn.metrics import mean_absolute_error, mean_squared_error
naive_mae = mean_absolute_error(df['Close'][1:], naive_forecast[1:])
naive_mse = mean_squared_error(df['Close'][1:], naive_forecast[1:])
naive_rmse = np.sqrt(naive_mse)
print(f'Naive Method MAE: {naive_mae}')
print(f'Naive Method MSE: {naive_mse}')
print(f'Naive Method RMSE: {naive_rmse}')
Naive Method MAE: 0.06966101694915253 Naive Method MSE: 0.0107091186440678 Naive Method RMSE: 0.10348487157100693
Average Historical Method¶
In [15]:
# Average historical method
average_forecast = [df['Close'][:i].mean() for i in range(1, len(df)+1)]
# Plot actual vs forecast
plt.figure(figsize=(10, 6))
plt.plot(df['Close'], label='Actual')
plt.plot(df.index, average_forecast, label='Average Historical Forecast', linestyle='--')
plt.title('Average Historical Forecast vs Actual')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.legend()
plt.show()
# Calculate accuracy metrics for Average Historical Method
average_mae = mean_absolute_error(df['Close'], average_forecast)
average_mse = mean_squared_error(df['Close'], average_forecast)
average_rmse = np.sqrt(average_mse)
print(f'Average Historical Method MAE: {average_mae}')
print(f'Average Historical Method MSE: {average_mse}')
print(f'Average Historical Method RMSE: {average_rmse}')
Average Historical Method MAE: 0.20028654805085913 Average Historical Method MSE: 0.06101835414212262 Average Historical Method RMSE: 0.2470189347846084
Activity 2¶
Time Series Decomposition¶
In [20]:
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import seasonal_decompose
result_additive = seasonal_decompose(df['Close'], model='additive', period=12)
result_multiplicative = seasonal_decompose(df['Close'], model='multiplicative', period=12)
In [21]:
plt.figure(figsize=(12, 8))
plt.subplot(2, 1, 1)
result_additive.trend.plot(ax=plt.gca(), title='Additive Decomposition')
plt.ylabel('Trend')
plt.subplot(2, 1, 2)
result_additive.seasonal.plot(ax=plt.gca())
plt.ylabel('Seasonal')
plt.tight_layout()
plt.show()
In [22]:
plt.figure(figsize=(12, 8))
plt.subplot(2, 1, 1)
result_multiplicative.trend.plot(ax=plt.gca(), title='Multiplicative Decomposition')
plt.ylabel('Trend')
plt.subplot(2, 1, 2)
result_multiplicative.seasonal.plot(ax=plt.gca())
plt.ylabel('Seasonal')
plt.tight_layout()
plt.show()
In [ ]:
In [ ]:
In [ ]:
Simple Average Method¶
In [24]:
simple_average_forecast = df['Close'].mean()
plt.figure(figsize=(10, 6))
plt.plot(df.index, df['Close'], label='Actual')
plt.axhline(y=simple_average_forecast, color='r', linestyle='--', label='Simple Average Forecast')
plt.title('Simple Average Forecast vs Actual')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.legend()
plt.show()
from sklearn.metrics import mean_absolute_error, mean_squared_error
simple_average_mae = mean_absolute_error(df['Close'], [simple_average_forecast] * len(df))
simple_average_mse = mean_squared_error(df['Close'], [simple_average_forecast] * len(df))
simple_average_rmse = np.sqrt(simple_average_mse)
print(f'Simple Average Method MAE: {simple_average_mae}')
print(f'Simple Average Method MSE: {simple_average_mse}')
print(f'Simple Average Method RMSE: {simple_average_rmse}')
Simple Average Method MAE: 0.19454666666666662 Simple Average Method MSE: 0.06310288222222223 Simple Average Method RMSE: 0.2512028706488487
In [ ]:
Exponential Smoothing methods¶
In [29]:
from statsmodels.tsa.holtwinters import ExponentialSmoothing
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
In [31]:
# Single Exponential Smoothing
ses_model = ExponentialSmoothing(df['Close']).fit(optimized=True)
ses_forecast = ses_model.forecast(len(df))
# Holt's Linear Exponential Smoothing
holt_model = ExponentialSmoothing(df['Close'], trend='add').fit(optimized=True)
holt_forecast = holt_model.forecast(len(df))
# Holt-Winters Exponential Smoothing
hw_model = ExponentialSmoothing(df['Close'], trend='add', seasonal='add', seasonal_periods=12).fit(optimized=True)
hw_forecast = hw_model.forecast(len(df))
In [32]:
# Plot actual vs forecast - Single Exponential Smoothing
plt.figure(figsize=(10, 6))
plt.plot(df.index, df['Close'], label='Actual')
plt.plot(df.index, ses_forecast, label='SES Forecast', linestyle='--')
plt.title('SES Forecast vs Actual')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.legend()
plt.show()
In [34]:
# Holt's Linear Exponential Smoothing
holt_model = ExponentialSmoothing(df['Close'], trend='add').fit(optimized=True)
holt_forecast = holt_model.forecast(len(df))
# Plot actual vs forecast - Holt's Linear Exponential Smoothing
plt.figure(figsize=(10, 6))
plt.plot(df.index, df['Close'], label='Actual')
plt.plot(df.index, holt_forecast, label="Holt's Linear Forecast", linestyle='--')
plt.title("Holt's Linear Forecast vs Actual")
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.legend()
plt.show()
In [35]:
# Holt-Winters Exponential Smoothing
hw_model = ExponentialSmoothing(df['Close'], trend='add', seasonal='add', seasonal_periods=12).fit(optimized=True)
hw_forecast = hw_model.forecast(len(df))
# Plot actual vs forecast - Holt-Winters Exponential Smoothing
plt.figure(figsize=(10, 6))
plt.plot(df.index, df['Close'], label='Actual')
plt.plot(df.index, hw_forecast, label="Holt-Winters Forecast", linestyle='--')
plt.title("Holt-Winters Forecast vs Actual")
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.legend()
plt.show()
In [36]:
# Calculatng accuracy metrics for Exponential Smoothing Methods
ses_mae = mean_absolute_error(df['Close'], ses_forecast)
ses_mse = mean_squared_error(df['Close'], ses_forecast)
ses_rmse = np.sqrt(ses_mse)
holt_mae = mean_absolute_error(df['Close'], holt_forecast)
holt_mse = mean_squared_error(df['Close'], holt_forecast)
holt_rmse = np.sqrt(holt_mse)
hw_mae = mean_absolute_error(df['Close'], hw_forecast)
hw_mse = mean_squared_error(df['Close'], hw_forecast)
hw_rmse = np.sqrt(hw_mse)
print(f'SES Method MAE: {ses_mae}')
print(f'SES Method MSE: {ses_mse}')
print(f'SES Method RMSE: {ses_rmse}')
print(f"Holt's Linear Method MAE: {holt_mae}")
print(f"Holt's Linear Method MSE: {holt_mse}")
print(f"Holt's Linear Method RMSE: {holt_rmse}")
print(f'Holt-Winters Method MAE: {hw_mae}')
print(f'Holt-Winters Method MSE: {hw_mse}')
SES Method MAE: 0.3784666665226221 SES Method MSE: 0.20633989988720813 SES Method RMSE: 0.4542465188498511 Holt's Linear Method MAE: 0.6223080030322905 Holt's Linear Method MSE: 0.4385011076389994 Holt's Linear Method RMSE: 0.6621941615863125 Holt-Winters Method MAE: 0.6659882542623528 Holt-Winters Method MSE: 0.49333830188703975
Activity 3¶
Time Series Stationarity Test and Differencing¶
In [49]:
from statsmodels.tsa.stattools import adfuller
result = adfuller(df['Close'])
print('ADF Statistic (Original Series):', result[0])
print('p-value (Original Series):', result[1])
print('Critical Values:')
for key, value in result[4].items():
print('\t', key, ':', value)
if result[1] > 0.05:
try:
df['Close_diff'] = df['Close'].diff().dropna()
result_diff = adfuller(df['Close_diff'])
print('ADF Statistic (After differencing):', result_diff[0])
print('p-value (After differencing):', result_diff[1])
except Exception as e:
print("E:", e)
ADF Statistic (Original Series): -1.2420817124279229 p-value (Original Series): 0.6552364663643981 Critical Values: 1% : -3.5463945337644063 5% : -2.911939409384601 10% : -2.5936515282964665 E: exog contains inf or nans
In [ ]:
ACF and PACF¶
In [63]:
from statsmodels.tsa.stattools import pacf, acf
import matplotlib.pyplot as plt
acf_values = acf(df['Close_diff'].dropna(), nlags=20)
pacf_values = pacf(df['Close_diff'].dropna(), nlags=20)
plt.figure(figsize=(12, 6))
plt.subplot(2, 1, 1)
plt.plot(range(len(acf_values)), acf_values, marker='o', linestyle='-', color='red')
plt.title('Autocorrelation Function (ACF)')
plt.xlabel('Lag')
plt.ylabel('ACF')
plt.grid(True)
for i, txt in enumerate(acf_values):
plt.annotate(f'{txt:.2f}', (i, acf_values[i]), textcoords="offset points", xytext=(0,10), ha='center')
plt.subplot(2, 1, 2)
plt.plot(range(len(pacf_values)), pacf_values, marker='o', linestyle='-', color='blue')
plt.title('Partial Autocorrelation Function (PACF)')
plt.xlabel('Lag')
plt.ylabel('PACF')
plt.grid(True)
for i, txt in enumerate(pacf_values):
plt.annotate(f'{txt:.2f}', (i, pacf_values[i]), textcoords="offset points", xytext=(0,10), ha='center')
plt.tight_layout()
plt.show()
ARIMA Model and forecast¶
In [65]:
from statsmodels.tsa.arima.model import ARIMA
p = 1 # AR term
d = 1 # Differencing
q = 1 # MA term
model = ARIMA(df['Close'], order=(p, d, q)).fit()
print(model.summary())
SARIMAX Results
==============================================================================
Dep. Variable: Close No. Observations: 60
Model: ARIMA(1, 1, 1) Log Likelihood 50.188
Date: Mon, 03 Jun 2024 AIC -94.377
Time: 19:36:43 BIC -88.144
Sample: 0 HQIC -91.944
- 60
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 -0.1823 2.664 -0.068 0.945 -5.403 5.039
ma.L1 0.2312 2.653 0.087 0.931 -4.969 5.431
sigma2 0.0107 0.001 9.589 0.000 0.008 0.013
===================================================================================
Ljung-Box (L1) (Q): 0.01 Jarque-Bera (JB): 95.78
Prob(Q): 0.92 Prob(JB): 0.00
Heteroskedasticity (H): 0.38 Skew: 1.81
Prob(H) (two-sided): 0.04 Kurtosis: 8.08
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
In [69]:
forecast_period = 10
forecast = model.forecast(steps=forecast_period)
plt.figure(figsize=(10, 6))
plt.plot(df.index, df['Close'], label='Actual')
plt.plot(pd.date_range(start=df.index[-1], periods=forecast_period + 1, freq='M')[1:], forecast, label='ARIMA Forecast', linestyle='--')
plt.title('ARIMA Forecast')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.legend()
plt.show()
In [75]:
arima_forecast = model.forecast(steps=forecast_period)
arima_mae = mean_absolute_error(df['Close'][-forecast_period:], arima_forecast)
arima_mse = mean_squared_error(df['Close'][-forecast_period:], arima_forecast)
arima_rmse = np.sqrt(arima_mse)
print(f'ARIMA Method MAE: {arima_mae}')
print(f'ARIMA Method MSE: {arima_mse}')
print(f'ARIMA Method RMSE: {arima_rmse}')
ARIMA Method MAE: 0.07653107886887411 ARIMA Method MSE: 0.010287655980626299 ARIMA Method RMSE: 0.10142808280070317
Sarima model and forecast¶
In [71]:
from statsmodels.tsa.statespace.sarimax import SARIMAX
In [72]:
from statsmodels.tsa.statespace.sarimax import SARIMAX
p = 1 # AR order
d = 1 # Differencing
q = 1 # MA order
P = 1 # Seasonal AR order
D = 1 # Seasonal differencing
Q = 1 # Seasonal MA order
m = 12
sarima_model = SARIMAX(df['Close'], order=(p, d, q), seasonal_order=(P, D, Q, m)).fit()
print(sarima_model.summary())
SARIMAX Results
==========================================================================================
Dep. Variable: Close No. Observations: 60
Model: SARIMAX(1, 1, 1)x(1, 1, 1, 12) Log Likelihood 31.516
Date: Mon, 03 Jun 2024 AIC -53.032
Time: 19:39:45 BIC -43.782
Sample: 0 HQIC -49.551
- 60
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 0.7297 1.883 0.388 0.698 -2.961 4.421
ma.L1 -0.6907 1.923 -0.359 0.720 -4.460 3.079
ar.S.L12 -0.5207 0.187 -2.783 0.005 -0.887 -0.154
ma.S.L12 -0.3281 0.380 -0.863 0.388 -1.073 0.417
sigma2 0.0127 0.004 3.520 0.000 0.006 0.020
===================================================================================
Ljung-Box (L1) (Q): 0.01 Jarque-Bera (JB): 1.30
Prob(Q): 0.92 Prob(JB): 0.52
Heteroskedasticity (H): 0.72 Skew: 0.29
Prob(H) (two-sided): 0.53 Kurtosis: 3.57
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
In [76]:
# Plot SARIMA forecast
plt.figure(figsize=(10, 6))
plt.plot(df.index, df['Close'], label='Actual')
plt.plot(pd.date_range(start=df.index[-1], periods=forecast_period + 1, freq='M')[1:], sarima_forecast, label='SARIMA Forecast', linestyle='--')
plt.title('SARIMA Forecast')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.legend()
plt.show()
In [74]:
# Forecast with SARIMA model
forecast_period = 12 # Example: forecast for the next 12 periods
sarima_forecast = sarima_model.forecast(steps=forecast_period)
# Calculate accuracy metrics for SARIMA model
sarima_mae = mean_absolute_error(df['Close'][-forecast_period:], sarima_forecast)
sarima_mse = mean_squared_error(df['Close'][-forecast_period:], sarima_forecast)
sarima_rmse = np.sqrt(sarima_mse)
print(f'SARIMA Method MAE: {sarima_mae}')
print(f'SARIMA Method MSE: {sarima_mse}')
print(f'SARIMA Method RMSE: {sarima_rmse}')
SARIMA Method MAE: 0.20876680891557578 SARIMA Method MSE: 0.04523539372534637 SARIMA Method RMSE: 0.21268613900615707
In [ ]: